#import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.4f' % x)
import seaborn as sns
sns.set_context('paper', font_scale=1.3)
sns.set_style('white')
import warnings
warnings.filterwarnings('ignore')
from time import time
import matplotlib.ticker as tkr
from scipy import stats
from statsmodels.tsa.stattools import adfuller
from sklearn import preprocessing
from statsmodels.tsa.stattools import pacf
%matplotlib inline
import math
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from keras.callbacks import EarlyStopping
#uncomment if using the outdated dataset
# #create a for loop that reads all sheets from the excel file and updates the column headers
# x = [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
# for i in x:
# globals()[f"platts{i}"] = pd.read_excel('Platts Historical.xlsx', sheet_name=f"{i}")
# globals()[f"platts{i}"].columns = globals()[f"platts{i}"].iloc[0]
# globals()[f"platts{i}"] = globals()[f"platts{i}"].iloc[2:]
# globals()[f"platts{i}"].reset_index(inplace=True, drop=True)
# #contenate all of the created dataframes into one big dataframe
# df_comp = pd.concat([platts2009, platts2010, platts2011, platts2012, platts2013, platts2014, platts2015, platts2016, platts2017, platts2018, platts2019, platts2020, platts2021], ignore_index=True, axis=0)
# df_comp.head()
# df_comp = df_comp.rename(columns={np.nan: 'Date'})
# df_comp.head()
# #save the one big dataframe into a csv file for checkpoint and later calling
# df_comp.to_csv('mergedplatts.csv', index = False)
#uncomment if using the outdated dataset
# df = pd.read_csv('mergedplatts.csv')
# df.head()
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_excel("/content/drive/My Drive/Decisions Data Dump/Platts/Petroleum Data.xlsx")
df1 = df.copy()
df.head()
df['Date']= pd.to_datetime(df['Date'])
df['year']= df['Date'].apply(lambda x: x.year)
df['quarter']= df['Date'].apply(lambda x: x.quarter)
df['month']= df['Date'].apply(lambda x: x.month)
df['day']= df['Date'].apply(lambda x: x.day)
df.head()
df= df.reset_index(drop=True)
df['weekday']= df.apply(lambda row: row['Date'].weekday(), axis=1)
df['weekday']= (df['weekday']<5).astype(int) #to capture weekdays
print(df.shape)
print(df.Date.min())
print(df.Date.max())
df.tail(5)
print('The time series starts from: ', df.Date.min())
print('The time series ends on: ', df.Date.max())
# gasoline = df[["Date","PGAEY00", "year", "quarter", "month", "day"]].dropna().rename(columns={"PGAEY00": "Price"})
# print(gasoline.shape)
# gasoline.tail()
# diesel = df[["Date", "AAPPF00", "year", "quarter", "month", "day"]].dropna().rename(columns={"AAPPF00": "Price"})
# print(diesel.shape)
# diesel.head()
# g1=gasoline.loc[:,['Date','Price']]
# g1.set_index('Date', inplace=True)
# d1=diesel.loc[:,['Date','Price']]
# d1.set_index('Date', inplace=True)
df.set_index('Date', inplace=True)
df.Gasoline.plot(figsize = (25,5), label='Gasoline', color='#e58a3d', linewidth=1)
df.Diesel.plot(figsize = (25,5), label='Diesel', color='#3f5b9c', linewidth=1)
df['Jet Fuel'].plot(figsize = (25,5), label='Jet Fuel', color='#c24e5d', linewidth=1)
plt.ylabel('Price')
plt.legend()
plt.tight_layout()
plt.title('Diesel vs Gasoline vs Jet Fuel Prices', size=20)
sns.despine(top=True)
plt.show();
plt.figure(figsize=(15,10))
plt.subplot(3,2,1)
plt.subplots_adjust(wspace=0.2)
sns.boxplot(x='year', y='Diesel', data=df)
plt.xlabel('year')
plt.title('Box plot of Diesel Yearly Price')
sns.despine(left=True)
plt.tight_layout()
plt.subplot(3,2,2)
sns.boxplot(x='quarter', y='Diesel', data=df)
plt.xlabel('quarter')
plt.title('Box plot of Diesel Quarterly Price')
sns.despine(left=True)
plt.tight_layout()
plt.subplot(3,2,3)
plt.subplots_adjust(wspace=0.2)
sns.boxplot(x='year', y='Gasoline', data=df)
plt.xlabel('year')
plt.title('Box plot of Gasoline Yearly Price')
sns.despine(left=True)
plt.tight_layout()
plt.subplot(3,2,4)
sns.boxplot(x='quarter', y='Gasoline', data=df)
plt.xlabel('quarter')
plt.title('Box plot of Gasoline Quarterly Price')
sns.despine(left=True)
plt.tight_layout();
plt.subplot(3,2,5)
plt.subplots_adjust(wspace=0.2)
sns.boxplot(x='year', y='Jet Fuel', data=df)
plt.xlabel('year')
plt.title('Box plot of Jet Fuel Yearly Price')
sns.despine(left=True)
plt.tight_layout()
plt.subplot(3,2,6)
sns.boxplot(x='quarter', y='Jet Fuel', data=df)
plt.xlabel('quarter')
plt.title('Box plot of Jet Fuel Quarterly Price')
sns.despine(left=True)
plt.tight_layout();
plt.figure(figsize=(14,20))
plt.subplot(3,2,1)
plt.subplots_adjust(wspace=0.2)
df.Diesel.hist(bins=50, color='#3f5b9c')
plt.title('Diesel Price Distribution')
plt.subplot(3,2,2)
stats.probplot(df.Diesel, plot=plt)
df.Diesel.describe().T
plt.subplot(3,2,3)
plt.subplots_adjust(wspace=0.2)
df.Gasoline.hist(bins=50, color='#e58a3d')
plt.title('Gasoline Price Distribution')
plt.subplot(3,2,4)
stats.probplot(df.Gasoline, plot=plt)
df.Gasoline.describe().T
plt.subplot(3,2,5)
plt.subplots_adjust(wspace=0.2)
df['Jet Fuel'].hist(bins=50, color='#c24e5d')
plt.title('Jet Fuel Price Distribution')
plt.subplot(3,2,6)
stats.probplot(df['Jet Fuel'], plot=plt)
df['Jet Fuel'].describe().T
fig= plt.figure(figsize=(18,16))
fig.subplots_adjust(hspace=.4)
ax1= fig.add_subplot(6,1,1)
ax1.plot(df.Diesel.resample('D').mean(), linewidth=2, color='#3f5b9c')
ax1.set_title('Mean Diesel Price resampled over day')
ax2= fig.add_subplot(6,1,2, sharex=ax1)
ax2.plot(df.Diesel.resample('W').mean(), linewidth=2, color='#3f5b9c')
ax2.set_title('Mean Diesel Price resampled over week')
ax2.tick_params(axis='both', which='major')
ax3= fig.add_subplot(6,1,3, sharex=ax1)
ax3.plot(df.Diesel.resample('M').mean(), linewidth=2, color='#3f5b9c')
ax3.set_title('Mean Diesel Price resampled over month')
ax3.tick_params(axis='both', which='major')
ax4= fig.add_subplot(6,1,4, sharex=ax1)
ax4.plot(df.Diesel.resample('Q').mean(), linewidth=2, color='#3f5b9c')
ax4.set_title('Mean Diesel Price resampled over quarter')
ax4.tick_params(axis='both', which='major')
ax5= fig.add_subplot(6,1,5, sharex=ax1)
ax5.plot(df.Diesel.resample('A').mean(), linewidth=2, color='#3f5b9c')
ax5.set_title('Mean Diesel Price resampled over year')
ax5.tick_params(axis='both', which='major')
ax6= fig.add_subplot(6,1,6, sharex=ax1)
ax6.plot(df.Diesel.resample('SM').mean(), linewidth=2, color='#3f5b9c')
ax6.set_title('Mean Diesel Price resampled over semi-month end')
ax6.tick_params(axis='both', which='major')
fig= plt.figure(figsize=(18,16))
fig.subplots_adjust(hspace=.4)
ax1= fig.add_subplot(6,1,1)
ax1.plot(df.Gasoline.resample('D').mean(), linewidth=2, color='#e58a3d')
ax1.set_title('Mean Gasoline Price resampled over day')
ax2= fig.add_subplot(6,1,2, sharex=ax1)
ax2.plot(df.Gasoline.resample('W').mean(), linewidth=2, color='#e58a3d')
ax2.set_title('Mean Gasoline Price resampled over week')
ax2.tick_params(axis='both', which='major')
ax3= fig.add_subplot(6,1,3, sharex=ax1)
ax3.plot(df.Gasoline.resample('M').mean(), linewidth=2, color='#e58a3d')
ax3.set_title('Mean Gasoline Price resampled over month')
ax3.tick_params(axis='both', which='major')
ax4= fig.add_subplot(6,1,4, sharex=ax1)
ax4.plot(df.Gasoline.resample('Q').mean(), linewidth=2, color='#e58a3d')
ax4.set_title('Mean Gasoline Price resampled over quarter')
ax4.tick_params(axis='both', which='major')
ax5= fig.add_subplot(6,1,5, sharex=ax1)
ax5.plot(df.Gasoline.resample('A').mean(), linewidth=2, color='#e58a3d')
ax5.set_title('Mean Gasoline Price resampled over year')
ax5.tick_params(axis='both', which='major')
ax6= fig.add_subplot(6,1,6, sharex=ax1)
ax6.plot(df.Gasoline.resample('SM').mean(), linewidth=2, color='#e58a3d')
ax6.set_title('Mean Gasoline Price resampled over semi-month end')
ax6.tick_params(axis='both', which='major')
fig= plt.figure(figsize=(18,16))
fig.subplots_adjust(hspace=.4)
ax1= fig.add_subplot(6,1,1)
ax1.plot(df['Jet Fuel'].resample('D').mean(), linewidth=2, color='#c24e5d')
ax1.set_title('Mean Jet Fuel Price resampled over day')
ax2= fig.add_subplot(6,1,2, sharex=ax1)
ax2.plot(df['Jet Fuel'].resample('W').mean(), linewidth=2, color='#c24e5d')
ax2.set_title('Mean Jet Fuel Price resampled over week')
ax2.tick_params(axis='both', which='major')
ax3= fig.add_subplot(6,1,3, sharex=ax1)
ax3.plot(df['Jet Fuel'].resample('M').mean(), linewidth=2, color='#c24e5d')
ax3.set_title('Mean Jet Fuel Price resampled over month')
ax3.tick_params(axis='both', which='major')
ax4= fig.add_subplot(6,1,4, sharex=ax1)
ax4.plot(df['Jet Fuel'].resample('Q').mean(), linewidth=2, color='#c24e5d')
ax4.set_title('Mean Jet Fuel Price resampled over quarter')
ax4.tick_params(axis='both', which='major')
ax5= fig.add_subplot(6,1,5, sharex=ax1)
ax5.plot(df['Jet Fuel'].resample('A').mean(), linewidth=2, color='#c24e5d')
ax5.set_title('Mean Jet Fuel Price resampled over year')
ax5.tick_params(axis='both', which='major')
ax6= fig.add_subplot(6,1,6, sharex=ax1)
ax6.plot(df['Jet Fuel'].resample('SM').mean(), linewidth=2, color='#c24e5d')
ax6.set_title('Mean Jet Fuel Price resampled over semi-month end')
ax6.tick_params(axis='both', which='major')
plt.figure(figsize=(14,8))
plt.subplot(2,2,1)
df.groupby('year').Diesel.agg('mean').plot(color='#3f5b9c', linewidth=3)
plt.xlabel('')
plt.title('Mean Diesel Price by Year')
plt.subplot(2,2,2)
df.groupby('quarter').Diesel.agg('mean').plot(color='#3f5b9c', linewidth=3)
plt.xlabel('')
plt.title('Mean Diesel Price by Quarter')
plt.subplot(2,2,3)
df.groupby('month').Diesel.agg('mean').plot(color='#3f5b9c', linewidth=3)
plt.xlabel('')
plt.title('Mean Diesel Price by Month')
plt.subplot(2,2,4)
df.groupby('day').Diesel.agg('mean').plot(color='#3f5b9c', linewidth=3)
plt.xlabel('')
plt.title('Mean Diesel Price by Day');
plt.figure(figsize=(14,8))
plt.subplot(2,2,1)
df.groupby('year').Gasoline.agg('mean').plot(color='#e58a3d', linewidth=3)
plt.xlabel('')
plt.title('Mean Gasoline Price by Year')
plt.subplot(2,2,2)
df.groupby('quarter').Gasoline.agg('mean').plot(color='#e58a3d', linewidth=3)
plt.xlabel('')
plt.title('Mean Gasoline Price by Quarter')
plt.subplot(2,2,3)
df.groupby('month').Gasoline.agg('mean').plot(color='#e58a3d', linewidth=3)
plt.xlabel('')
plt.title('Mean Gasoline Price by Month')
plt.subplot(2,2,4)
df.groupby('day').Gasoline.agg('mean').plot(color='#e58a3d', linewidth=3)
plt.xlabel('')
plt.title('Mean Gasoline Price by Day');
plt.figure(figsize=(14,8))
plt.subplot(2,2,1)
df.groupby('year')['Jet Fuel'].agg('mean').plot(color='#c24e5d', linewidth=3)
plt.xlabel('')
plt.title('Mean Jet Fuel Price by Year')
plt.subplot(2,2,2)
df.groupby('quarter')['Jet Fuel'].agg('mean').plot(color='#c24e5d', linewidth=3)
plt.xlabel('')
plt.title('Mean Jet Fuel Price by Quarter')
plt.subplot(2,2,3)
df.groupby('month')['Jet Fuel'].agg('mean').plot(color='#c24e5d', linewidth=3)
plt.xlabel('')
plt.title('Mean Jet Fuel Price by Month')
plt.subplot(2,2,4)
df.groupby('day')['Jet Fuel'].agg('mean').plot(color='#c24e5d', linewidth=3)
plt.xlabel('')
plt.title('Mean Jet Fuel Price by Day');
stat, p = stats.normaltest(df.Diesel)
print('Statistics = %.3f, p = %.3f' % (stat,p))
alpha = 0.05
if p > alpha:
print('Data looks Gaussian (fail to reject H0)')
else:
print('Data does not look Gaussian (reject H0)')
print(df.Diesel.describe())
sns.distplot(df.Diesel, color='#3f5b9c');
print('Kurtosis: {}'.format(stats.kurtosis(df.Diesel)))
print('Skewness: {}'.format(stats.skew(df.Diesel)))
stat, p = stats.normaltest(df.Gasoline)
print('Statistics = %.3f, p = %.3f' % (stat,p))
alpha = 0.05
if p > alpha:
print('Data looks Gaussian (fail to reject H0)')
else:
print('Data does not look Gaussian (reject H0)')
print(df.Gasoline.describe())
sns.distplot(df.Gasoline, color='#e58a3d');
print('Kurtosis: {}'.format(stats.kurtosis(df.Gasoline)))
print('Skewness: {}'.format(stats.skew(df.Gasoline)))
stat, p = stats.normaltest(df['Jet Fuel'])
print('Statistics = %.3f, p = %.3f' % (stat,p))
alpha = 0.05
if p > alpha:
print('Data looks Gaussian (fail to reject H0)')
else:
print('Data does not look Gaussian (reject H0)')
print(df['Jet Fuel'].describe())
sns.distplot(df['Jet Fuel'], color='#c24e5d');
print('Kurtosis: {}'.format(stats.kurtosis(df['Jet Fuel'])))
print('Skewness: {}'.format(stats.skew(df['Jet Fuel'])))
df1['Date']= pd.to_datetime(df1['Date'])
df1.head()
def test_stationarity(timeseries):
rolmean = timeseries.rolling(window=30).mean()
rolstd = timeseries.rolling(window=30).std()
plt.figure(figsize=(14,5))
sns.despine(left=True)
orig = plt.plot(timeseries, color='blue',label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label = 'Rolling Std')
plt.legend(loc='best'); plt.title('Rolling Mean & Standard Deviation')
plt.show()
print ('<Results of Dickey-Fuller Test>')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4],
index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
test_stationarity(df1['Diesel'])
test_stationarity(df1.Gasoline)
test_stationarity(df1['Jet Fuel'])
diesel = df1[['Date','Diesel']]
gasoline = df1[['Date','Gasoline']]
jet = df1[['Date','Jet Fuel']]
#transform to stationary
diesel["diff_1"] = diesel["Diesel"].diff(periods=1)
diesel.head()
test_stationarity(diesel["diff_1"].dropna())
#transform to stationary
gasoline["diff_1"] = gasoline["Gasoline"].diff(periods=1)
gasoline.head()
test_stationarity(gasoline["diff_1"].dropna())
#transform to stationary
jet["diff_1"] = jet["Jet Fuel"].diff(periods=1)
jet.head()
test_stationarity(jet["diff_1"].dropna())
gasoline.set_index('Date', inplace=True)
gasoline.head()
gasoline1=gasoline.dropna()
print(gasoline1.shape)
gasoline1.head()
#convert column to array
np.reshape(gasoline1.diff_1.values, (-1,1))
dataset= gasoline1.diff_1.values #numpy.ndarray
dataset= dataset.astype('float32')
dataset= np.reshape(dataset,(-1,1))
scaler= MinMaxScaler(feature_range=(0,1))
dataset= scaler.fit_transform(dataset)
dataset #compressed
len(dataset)
# train_size = int(len(dataset) * 0.80)
# test_size = len(dataset) - train_size
train_size=2905
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(train.shape, test.shape)
#convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
#creates window pockkets
X,Y = [], []
for i in range(len(dataset)-look_back-1):
a=dataset[i:(i+look_back),0]
X.append(a)
Y.append(dataset[i + look_back,0])
return np.array(X), np.array(Y)
#reshape into X=t and Y=t+1
look_back = 30
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test= create_dataset(test, look_back)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
X_test
#reshape input to be [samples, time steps, features]
X_train= np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0],1, X_test.shape[1]))
X_test
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
#creating layers
model = Sequential()
model.add(LSTM(200, input_shape=(X_train.shape[1], X_train.shape[2]))) #LSTM layer with 100 memory units
model.add(Dropout(0.2))
model.add(Dense(1)) # no activation applied
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, Y_train, epochs=20, batch_size=70, validation_data=(X_test, Y_test),
callbacks=[EarlyStopping(monitor='val_loss', patience=20)], verbose=1, shuffle=False)
# Training Phase
model.summary()
train_predict= model.predict(X_train)
test_predict= model.predict(X_test)
#invert predictions
train_predict= scaler.inverse_transform(train_predict)
Y_train=scaler.inverse_transform([Y_train])
test_predict= scaler.inverse_transform(test_predict)
Y_test= scaler.inverse_transform([Y_test])
print('Train Mean Absolute Error:', mean_absolute_error(Y_train[0], train_predict[:,0]))
print('Train Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0])))
print('Test Mean Absolute Error:', mean_absolute_error(Y_test[0], test_predict[:,0]))
print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0])))
print(train_predict.shape, test_predict.shape)
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show();
aa=[x for x in range(60)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_test[0][:60], marker='.', label="actual")
plt.plot(aa, test_predict[:,0][:60], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
aa=[x for x in range(300)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_train[0][:300], marker='.', label="actual")
plt.plot(aa, train_predict[:,0][:300], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
from sklearn.metrics import r2_score
print('Train R2 Score: ', r2_score(Y_train[0], train_predict[:,0]))
print('Test R2 Score: ', r2_score(Y_test[0], test_predict[:,0]))
# !pip install pmdarima
from pmdarima.utils import diff_inv
def inv_diff (df_orig_column,df_diff_column, periods):
# Generate np.array for the diff_inv function - it includes first n values(n =
# periods) of original data & further diff values of given periods
value = np.array(df_orig_column[:periods].tolist()+df_diff_column[periods:].tolist())
# Generate np.array with inverse diff
inv_diff_vals = diff_inv(value, periods,1 )[periods:]
return inv_diff_vals
gasoline['r_diff'] = inv_diff(gasoline.Gasoline, gasoline.diff_1, 1)
gasoline.head()
# print(r_train.index.min(), r_train.index.max())
# print(r_train.index.min(), r_train.index.max())
r_test = gasoline.r_diff[train_size+2:-(look_back)]
len(r_test)
print(r_test.index.min(), r_test.index.max())
#convert test_predict to series
test_predict_s = pd.Series(test_predict.reshape(-1))
test_predict_s
#use def inv_diff to get inverse-diff of predicted test values
r_predict = inv_diff(df['Gasoline'][train_size:], test_predict_s, 1)
r_predict_s = pd.Series(r_predict)
r_predict_s.index = r_test.index
print(r_test.index.min(), r_test.index.max())
# print(r_train.index.min(), r_train.index.max())
print(r_predict_s.index.min(), r_predict_s.index.max())
df['Gasoline'][:train_size].plot(figsize = (25,5), label='train', color='#e58a3d')
df['Gasoline'][train_size:].plot(figsize = (25,5), label='test', color='#e53d98')
r_predict_s.plot(figsize = (25,5), label='prediction', color='#e53d98', linestyle='--')
plt.title('LSTM Gasoline Results', size=24)
plt.legend()
plt.show()
print('Test MSE after Inverse Diff:', mean_absolute_error(r_test, r_predict_s))
print('Test RMSE after Inverse Diff:',np.sqrt(mean_absolute_error(r_test, r_predict_s)))
print('Test R2 Score after Inverse Diff: ', r2_score(r_test, r_predict_s))
lstm_gas = r_predict_s.copy()
len(lstm_gas)
diesel.set_index('Date', inplace=True)
diesel.head()
diesel1=diesel.dropna()
print(diesel1.shape)
diesel1.head()
#convert column to array
np.reshape(diesel1.diff_1.values, (-1,1))
dataset= diesel1.diff_1.values #numpy.ndarray
dataset= dataset.astype('float32')
dataset= np.reshape(dataset,(-1,1))
scaler= MinMaxScaler(feature_range=(0,1))
dataset= scaler.fit_transform(dataset)
dataset #compressed
len(dataset)
# train_size = int(len(dataset) * 0.80)
# test_size = len(dataset) - train_size
train_size = 2905
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(train.shape, test.shape)
#convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
#creates window pockkets
X,Y = [], []
for i in range(len(dataset)-look_back-1):
a=dataset[i:(i+look_back),0]
X.append(a)
Y.append(dataset[i + look_back,0])
return np.array(X), np.array(Y)
#reshape into X=t and Y=t+1
look_back = 30
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test= create_dataset(test, look_back)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
X_test
#reshape input to be [samples, time steps, features]
X_train= np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0],1, X_test.shape[1]))
X_test
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
#creating layers
model = Sequential()
model.add(LSTM(200, input_shape=(X_train.shape[1], X_train.shape[2]))) #LSTM layer with 100 memory units
model.add(Dropout(0.2))
model.add(Dense(1)) # no activation applied
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, Y_train, epochs=20, batch_size=70, validation_data=(X_test, Y_test),
callbacks=[EarlyStopping(monitor='val_loss', patience=20)], verbose=1, shuffle=False)
# Training Phase
model.summary()
train_predict= model.predict(X_train)
test_predict= model.predict(X_test)
#invert predictions
train_predict= scaler.inverse_transform(train_predict)
Y_train=scaler.inverse_transform([Y_train])
test_predict= scaler.inverse_transform(test_predict)
Y_test= scaler.inverse_transform([Y_test])
print('Train Mean Absolute Error:', mean_absolute_error(Y_train[0], train_predict[:,0]))
print('Train Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0])))
print('Test Mean Absolute Error:', mean_absolute_error(Y_test[0], test_predict[:,0]))
print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0])))
print(train_predict.shape, test_predict.shape)
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show();
aa=[x for x in range(60)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_test[0][:60], marker='.', label="actual")
plt.plot(aa, test_predict[:,0][:60], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
aa=[x for x in range(300)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_train[0][:300], marker='.', label="actual")
plt.plot(aa, train_predict[:,0][:300], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
from sklearn.metrics import r2_score
print('Train R2 Score: ', r2_score(Y_train[0], train_predict[:,0]))
print('Test R2 Score: ', r2_score(Y_test[0], test_predict[:,0]))
# !pip install pmdarima
from pmdarima.utils import diff_inv
def inv_diff (df_orig_column,df_diff_column, periods):
# Generate np.array for the diff_inv function - it includes first n values(n =
# periods) of original data & further diff values of given periods
value = np.array(df_orig_column[:periods].tolist()+df_diff_column[periods:].tolist())
# Generate np.array with inverse diff
inv_diff_vals = diff_inv(value, periods,1 )[periods:]
return inv_diff_vals
diesel['r_diff'] = inv_diff(diesel.Diesel, diesel.diff_1, 1)
diesel.head()
r_train = diesel.r_diff[1:train_size+1]
len(r_train)
print(r_train.index.min(), r_train.index.max())
print(r_train.index.min(), r_train.index.max())
r_test = diesel.r_diff[train_size+2:-(look_back)]
len(r_test)
print(r_test.index.min(), r_test.index.max())
#convert test_predict to series
test_predict_s = pd.Series(test_predict.reshape(-1))
test_predict_s
#use def inv_diff to get inverse-diff of predicted test values
r_predict = inv_diff(r_test, test_predict_s, 1)
r_predict_s = pd.Series(r_predict)
r_predict_s.index = r_test.index
print(r_test.index.min(), r_test.index.max())
print(r_train.index.min(), r_train.index.max())
print(r_predict_s.index.min(), r_predict_s.index.max())
df['Diesel'][:train_size].plot(figsize = (25,5), label='train', color='#3f5b9c')
df['Diesel'][train_size:].plot(figsize = (25,5), label='test', color='#9c3f5b')
r_predict_s.plot(figsize = (25,5), label='prediction', linestyle='--', color='#9c3f5b')
plt.title('LSTM Diesel Results', size=24)
plt.legend()
plt.show()
print('Test MSE after Inverse Diff:', mean_absolute_error(r_test, r_predict_s))
print('Test RMSE after Inverse Diff:',np.sqrt(mean_absolute_error(r_test, r_predict_s)))
print('Test R2 Score after Inverse Diff: ', r2_score(r_test, r_predict_s))
lstm_diesel = r_predict_s.copy()
len(lstm_diesel)
jet.set_index('Date', inplace=True)
jet.head()
jet1=jet.dropna()
print(jet1.shape)
jet1.head()
#convert column to array
np.reshape(jet1.diff_1.values, (-1,1))
dataset= jet1.diff_1.values #numpy.ndarray
dataset= dataset.astype('float32')
dataset= np.reshape(dataset,(-1,1))
scaler= MinMaxScaler(feature_range=(0,1))
dataset= scaler.fit_transform(dataset)
dataset #compressed
len(dataset)
# train_size = int(len(dataset) * 0.80)
# test_size = len(dataset) - train_size
train_size = 2905
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(train.shape, test.shape)
#convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
#creates window pockkets
X,Y = [], []
for i in range(len(dataset)-look_back-1):
a=dataset[i:(i+look_back),0]
X.append(a)
Y.append(dataset[i + look_back,0])
return np.array(X), np.array(Y)
#reshape into X=t and Y=t+1
look_back = 30
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test= create_dataset(test, look_back)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
X_test
#reshape input to be [samples, time steps, features]
X_train= np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0],1, X_test.shape[1]))
X_test
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
#creating layers
model = Sequential()
model.add(LSTM(200, input_shape=(X_train.shape[1], X_train.shape[2]))) #LSTM layer with 100 memory units
model.add(Dropout(0.2))
model.add(Dense(1)) # no activation applied
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, Y_train, epochs=20, batch_size=70, validation_data=(X_test, Y_test),
callbacks=[EarlyStopping(monitor='val_loss', patience=20)], verbose=1, shuffle=False)
# Training Phase
model.summary()
train_predict= model.predict(X_train)
test_predict= model.predict(X_test)
#invert predictions
train_predict= scaler.inverse_transform(train_predict)
Y_train=scaler.inverse_transform([Y_train])
test_predict= scaler.inverse_transform(test_predict)
Y_test= scaler.inverse_transform([Y_test])
print('Train Mean Absolute Error:', mean_absolute_error(Y_train[0], train_predict[:,0]))
print('Train Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0])))
print('Test Mean Absolute Error:', mean_absolute_error(Y_test[0], test_predict[:,0]))
print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0])))
print(train_predict.shape, test_predict.shape)
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show();
aa=[x for x in range(60)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_test[0][:60], marker='.', label="actual")
plt.plot(aa, test_predict[:,0][:60], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
aa=[x for x in range(300)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_train[0][:300], marker='.', label="actual")
plt.plot(aa, train_predict[:,0][:300], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
from sklearn.metrics import r2_score
print('Train R2 Score: ', r2_score(Y_train[0], train_predict[:,0]))
print('Test R2 Score: ', r2_score(Y_test[0], test_predict[:,0]))
# !pip install pmdarima
from pmdarima.utils import diff_inv
def inv_diff (df_orig_column,df_diff_column, periods):
# Generate np.array for the diff_inv function - it includes first n values(n =
# periods) of original data & further diff values of given periods
value = np.array(df_orig_column[:periods].tolist()+df_diff_column[periods:].tolist())
# Generate np.array with inverse diff
inv_diff_vals = diff_inv(value, periods,1 )[periods:]
return inv_diff_vals
jet['r_diff'] = inv_diff(jet['Jet Fuel'], jet.diff_1, 1)
jet.head()
r_train = jet.r_diff[1:train_size+1]
len(r_train)
print(r_train.index.min(), r_train.index.max())
print(r_train.index.min(), r_train.index.max())
r_test = jet.r_diff[train_size+2:-(look_back)]
len(r_test)
print(r_test.index.min(), r_test.index.max())
#convert test_predict to series
test_predict_s = pd.Series(test_predict.reshape(-1))
test_predict_s
#use def inv_diff to get inverse-diff of predicted test values
r_predict = inv_diff(r_test, test_predict_s, 1)
r_predict_s = pd.Series(r_predict)
r_predict_s.index = r_test.index
print(r_test.index.min(), r_test.index.max())
print(r_train.index.min(), r_train.index.max())
print(r_predict_s.index.min(), r_predict_s.index.max())
df['Jet Fuel'][:train_size].plot(figsize = (25,5), label='train', color='#c24e5d')
df['Jet Fuel'][train_size:].plot(figsize = (25,5), label='test', color='#5dc24e')
r_predict_s.plot(figsize = (25,5), label='prediction', linestyle='--', color='#5dc24e')
plt.title('LSTM Jet Fuel Results', size=24)
plt.legend()
plt.show()
print('Test MSE after Inverse Diff:', mean_absolute_error(r_test, r_predict_s))
print('Test RMSE after Inverse Diff:',np.sqrt(mean_absolute_error(r_test, r_predict_s)))
print('Test R2 Score after Inverse Diff: ', r2_score(r_test, r_predict_s))
lstm_jet = r_predict_s.copy()
len(lstm_jet)
# gasoline.set_index('Date', inplace=True)
gasoline.head()
df1=gasoline.Gasoline
df1
scaler= MinMaxScaler(feature_range=(0,1))
df1=scaler.fit_transform(np.array(df1).reshape(-1,1))
df1 #compressed
len(df1)
# train_size = int(len(df1) * 0.80)
# test_size = len(df1) - train_size
train_size = 2906
train, test = df1[0:train_size,:], df1[train_size:len(df1),:]
print(train.shape, test.shape)
#convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
#creates window pockkets
X,Y = [], []
for i in range(len(dataset)-look_back-1):
a=dataset[i:(i+look_back),0]
X.append(a)
Y.append(dataset[i + look_back,0])
return np.array(X), np.array(Y)
#reshape into X=t and Y=t+1
look_back = 30
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test= create_dataset(test, look_back)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
model=Sequential()
model.add(LSTM(100,return_sequences=True,input_shape=(30,1)))
model.add(LSTM(100,return_sequences=True))
model.add(LSTM(100))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.summary()
history = model.fit(X_train, Y_train, epochs=20, batch_size=70, validation_data=(X_test, Y_test),
callbacks=[EarlyStopping(monitor='val_loss', patience=20)], verbose=1, shuffle=False)
train_predict= model.predict(X_train)
test_predict= model.predict(X_test)
#invert predictions
train_predict= scaler.inverse_transform(train_predict)
Y_train=scaler.inverse_transform([Y_train])
test_predict= scaler.inverse_transform(test_predict)
Y_test= scaler.inverse_transform([Y_test])
print('Train Mean Absolute Error:', mean_absolute_error(Y_train[0], train_predict[:,0]))
print('Train Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0])))
print('Test Mean Absolute Error:', mean_absolute_error(Y_test[0], test_predict[:,0]))
print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0])))
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show();
aa=[x for x in range(60)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_test[0][:60], marker='.', label="actual")
plt.plot(aa, test_predict[:,0][:60], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
aa=[x for x in range(300)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_train[0][:300], marker='.', label="actual")
plt.plot(aa, train_predict[:,0][:300], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
from sklearn.metrics import r2_score
print('Train R2 Score: ', r2_score(Y_train[0], train_predict[:,0]))
print('Test R2 Score: ', r2_score(Y_test[0], test_predict[:,0]))
len(test)
x_input=test[70:].reshape(1,-1)
x_input.shape
temp_input=list(x_input)
temp_input=temp_input[0].tolist()
# demonstrate prediction for next 10 days
from numpy import array
lst_output=[]
n_steps=30
i=0
while(i<30):
if(len(temp_input)>30):
#print(temp_input)
x_input=np.array(temp_input[1:])
print("{} day input {}".format(i,x_input))
x_input=x_input.reshape(1,-1)
x_input = x_input.reshape((1, n_steps, 1))
#print(x_input)
yhat = model.predict(x_input, verbose=0)
print("{} day output {}".format(i,yhat))
temp_input.extend(yhat[0].tolist())
temp_input=temp_input[1:]
#print(temp_input)
lst_output.extend(yhat.tolist())
i=i+1
else:
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
print(yhat[0])
temp_input.extend(yhat[0].tolist())
print(len(temp_input))
lst_output.extend(yhat.tolist())
i=i+1
print(lst_output)
pred = pd.Series( (v[0] for v in lst_output) )
day_new=np.arange(1,31)
day_pred=np.arange(31,61)
len(df1)
plt.plot(day_new,scaler.inverse_transform(df1[2976:]))
plt.plot(day_pred,scaler.inverse_transform(lst_output))
print(Y_test.shape, Y_train.shape, test_predict.shape)
gasoline.index[:train_size-31]
test_predict.shape
plt.figure(figsize=(25,5))
plt.plot(df['Gasoline'][:train_size], label='train', color='#e58a3d')
plt.plot(df['Gasoline'][train_size:], label='test', color='#e53d98')
plt.plot(gasoline.index[-69:], test_predict[:,0], label='prediction', color='#e53d98', linestyle='--')
plt.title('Stacked LSTM Gasoline Results', size=24)
plt.legend()
plt.show()
stlstm_gas = test_predict[:,0].copy()
len(stlstm_gas)
diesel.head()
df1=diesel.Diesel
df1
scaler= MinMaxScaler(feature_range=(0,1))
df1=scaler.fit_transform(np.array(df1).reshape(-1,1))
df1 #compressed
len(df1)
# train_size = int(len(df1) * 0.80)
# test_size = len(df1) - train_size
train_size = 2906
train, test = df1[0:train_size,:], df1[train_size:len(df1),:]
print(train.shape, test.shape)
#convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
#creates window pockkets
X,Y = [], []
for i in range(len(dataset)-look_back-1):
a=dataset[i:(i+look_back),0]
X.append(a)
Y.append(dataset[i + look_back,0])
return np.array(X), np.array(Y)
#reshape into X=t and Y=t+1
look_back = 30
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test= create_dataset(test, look_back)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
model=Sequential()
model.add(LSTM(100,return_sequences=True,input_shape=(30,1)))
model.add(LSTM(100,return_sequences=True))
model.add(LSTM(100))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.summary()
history = model.fit(X_train, Y_train, epochs=20, batch_size=70, validation_data=(X_test, Y_test),
callbacks=[EarlyStopping(monitor='val_loss', patience=20)], verbose=1, shuffle=False)
train_predict= model.predict(X_train)
test_predict= model.predict(X_test)
#invert predictions
train_predict= scaler.inverse_transform(train_predict)
Y_train=scaler.inverse_transform([Y_train])
test_predict= scaler.inverse_transform(test_predict)
Y_test= scaler.inverse_transform([Y_test])
print('Train Mean Absolute Error:', mean_absolute_error(Y_train[0], train_predict[:,0]))
print('Train Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0])))
print('Test Mean Absolute Error:', mean_absolute_error(Y_test[0], test_predict[:,0]))
print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0])))
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show();
aa=[x for x in range(60)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_test[0][:60], marker='.', label="actual")
plt.plot(aa, test_predict[:,0][:60], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
aa=[x for x in range(300)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_train[0][:300], marker='.', label="actual")
plt.plot(aa, train_predict[:,0][:300], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
from sklearn.metrics import r2_score
print('Train R2 Score: ', r2_score(Y_train[0], train_predict[:,0]))
print('Test R2 Score: ', r2_score(Y_test[0], test_predict[:,0]))
len(test)
x_input=test[70:].reshape(1,-1)
x_input.shape
temp_input=list(x_input)
temp_input=temp_input[0].tolist()
# demonstrate prediction for next 10 days
from numpy import array
lst_output=[]
n_steps=30
i=0
while(i<30):
if(len(temp_input)>30):
#print(temp_input)
x_input=np.array(temp_input[1:])
print("{} day input {}".format(i,x_input))
x_input=x_input.reshape(1,-1)
x_input = x_input.reshape((1, n_steps, 1))
#print(x_input)
yhat = model.predict(x_input, verbose=0)
print("{} day output {}".format(i,yhat))
temp_input.extend(yhat[0].tolist())
temp_input=temp_input[1:]
#print(temp_input)
lst_output.extend(yhat.tolist())
i=i+1
else:
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
print(yhat[0])
temp_input.extend(yhat[0].tolist())
print(len(temp_input))
lst_output.extend(yhat.tolist())
i=i+1
# print(lst_output)
day_new=np.arange(1,31)
day_pred=np.arange(31,61)
len(df1)
plt.plot(day_new,scaler.inverse_transform(df1[2976:]))
plt.plot(day_pred,scaler.inverse_transform(lst_output))
print(Y_test.shape, Y_train.shape, test_predict.shape)
diesel.index[:train_size-31]
test_predict.shape
plt.figure(figsize=(25,5))
plt.plot(df['Diesel'][:train_size], label='train', color='#3f5b9c')
plt.plot(df['Diesel'][train_size:], label='test', color='#9c3f5b')
plt.plot(gasoline.index[-69:], test_predict[:,0], label='prediction', color='#9c3f5b', linestyle='--')
plt.title('Stacked LSTM Diesel Results', size=24)
plt.legend()
plt.show()
stlstm_diesel = test_predict[:,0].copy()
len(stlstm_diesel)
jet.head()
df1=jet['Jet Fuel']
df1
scaler= MinMaxScaler(feature_range=(0,1))
df1=scaler.fit_transform(np.array(df1).reshape(-1,1))
df1 #compressed
len(df1)
# train_size = int(len(df1) * 0.80)
# test_size = len(df1) - train_size
train_size=2906
train, test = df1[0:train_size,:], df1[train_size:len(df1),:]
print(train.shape, test.shape)
#convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
#creates window pockkets
X,Y = [], []
for i in range(len(dataset)-look_back-1):
a=dataset[i:(i+look_back),0]
X.append(a)
Y.append(dataset[i + look_back,0])
return np.array(X), np.array(Y)
#reshape into X=t and Y=t+1
look_back = 30
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test= create_dataset(test, look_back)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
model=Sequential()
model.add(LSTM(100,return_sequences=True,input_shape=(30,1)))
model.add(LSTM(100,return_sequences=True))
model.add(LSTM(100))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.summary()
history = model.fit(X_train, Y_train, epochs=20, batch_size=70, validation_data=(X_test, Y_test),
callbacks=[EarlyStopping(monitor='val_loss', patience=20)], verbose=1, shuffle=False)
train_predict= model.predict(X_train)
test_predict= model.predict(X_test)
#invert predictions
train_predict= scaler.inverse_transform(train_predict)
Y_train=scaler.inverse_transform([Y_train])
test_predict= scaler.inverse_transform(test_predict)
Y_test= scaler.inverse_transform([Y_test])
print('Train Mean Absolute Error:', mean_absolute_error(Y_train[0], train_predict[:,0]))
print('Train Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0])))
print('Test Mean Absolute Error:', mean_absolute_error(Y_test[0], test_predict[:,0]))
print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0])))
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show();
aa=[x for x in range(60)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_test[0][:60], marker='.', label="actual")
plt.plot(aa, test_predict[:,0][:60], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
aa=[x for x in range(300)]
plt.figure(figsize=(8,4))
plt.plot(aa, Y_train[0][:300], marker='.', label="actual")
plt.plot(aa, train_predict[:,0][:300], 'r', label="prediction")
# plt.tick_params(left=False, labelleft=True) #remove ticks
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Price', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show();
from sklearn.metrics import r2_score
print('Train R2 Score: ', r2_score(Y_train[0], train_predict[:,0]))
print('Test R2 Score: ', r2_score(Y_test[0], test_predict[:,0]))
len(test)
x_input=test[70:].reshape(1,-1)
x_input.shape
temp_input=list(x_input)
temp_input=temp_input[0].tolist()
# demonstrate prediction for next 10 days
from numpy import array
lst_output=[]
n_steps=30
i=0
while(i<30):
if(len(temp_input)>30):
#print(temp_input)
x_input=np.array(temp_input[1:])
print("{} day input {}".format(i,x_input))
x_input=x_input.reshape(1,-1)
x_input = x_input.reshape((1, n_steps, 1))
#print(x_input)
yhat = model.predict(x_input, verbose=0)
print("{} day output {}".format(i,yhat))
temp_input.extend(yhat[0].tolist())
temp_input=temp_input[1:]
#print(temp_input)
lst_output.extend(yhat.tolist())
i=i+1
else:
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
print(yhat[0])
temp_input.extend(yhat[0].tolist())
print(len(temp_input))
lst_output.extend(yhat.tolist())
i=i+1
# print(lst_output)
day_new=np.arange(1,31)
day_pred=np.arange(31,61)
len(df1)
plt.plot(day_new,scaler.inverse_transform(df1[2976:]))
plt.plot(day_pred,scaler.inverse_transform(lst_output))
print(Y_test.shape, Y_train.shape, test_predict.shape)
diesel.index[:train_size-31]
test_predict.shape
plt.figure(figsize=(25,5))
plt.plot(df['Jet Fuel'][:train_size], label='train', color='#c24e5d')
plt.plot(df['Jet Fuel'][train_size:], label='test', color='#5dc24e')
plt.plot(gasoline.index[-69:], test_predict[:,0], label='prediction', color='#5dc24e', linestyle='--')
plt.title('Stacked LSTM Jet Results', size=24)
plt.legend()
plt.show()
stlstm_jet = test_predict[:,0].copy()
len(stlstm_jet)
# load required libraries
import sys
import statsmodels as ss
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
gasoline.head()
gas = gasoline['Gasoline']
gas.head()
test_stationarity(gas)
#log transform
gas_log = np.log(gas)
gas_log.head()
test_stationarity(gas_log)
#transform to stationary
gas_log_diff = gas_log.diff(periods=1)
gas_log_diff
gas_log_diff.dropna(inplace=True)
test_stationarity(gas_log_diff)
len(gas_log_diff)
# train_size = int(len(gas_log_diff) * 0.80)
train_size = 2905
train = gas_log_diff[:train_size]
test = gas_log_diff[train_size:]
print("Train shape: " + str(len(train)))
print("Test shape: " + str(len(test)))
print(train.head(3), print(test.head(3)))
from statsmodels.graphics.tsaplots import plot_pacf
title = 'PACF: Log+Diff Gasoline'
lags = 40
plot_pacf(train, title=title, lags=lags, zero=False, method=("ols"));
from statsmodels.graphics.tsaplots import plot_acf
# plot ACF function for PH covid-19 deaths
title = 'ACF: Log+Diff Gasoline'
lags = 40
plot_acf(train, title=title, lags=lags, zero=False);
from statsmodels.tsa.arima.model import ARIMA
# fitting a simple ARIMA(1,1,1) model
model = ARIMA(train, order=(1,1,1))
model_result = model.fit()
model_result.summary()
from sklearn.metrics import mean_squared_error
# evaluate model MSE and RMSE
start = len(train)
end = len(train) + len(test) - 1
arima111_preds = model_result.predict(start=start, end=end, dynamic=False).rename('ARIMA111 Predictions')
arima111_mse = mean_squared_error(test, arima111_preds)
print("ARIMA(1,1,1) MSE: " + str(arima111_mse))
print("ARIMA(1,1,1) RMSE: " + str(np.sqrt(arima111_mse)))
# use auto_arima to automate ARIMA model selection
from pmdarima.arima import auto_arima
model_auto = auto_arima(
train,
exogenous=None, # don't use exogenous variables
m=1, # don't add seasonal cycles as I'm interested only in an ARIMA model
max_order=None, # don't limit total number of non-seasonal AR and MA components
max_p=15, # use maximum of 15 lags for AR and MA components as implied by the ACF plot of the ARIMA(1,1,1) residuals
max_q=15,
max_d=2, # use max integration of 2 to reach stationarity
maxiter=100,
alpha=0.05, # use 0.05 level of significance
njobs=-1, # fit as many as possible models
trend="ct",
start_P=0, # don't use seasonal components as I'm interested only in an ARIMA model
start_Q=0,
start_D=0,
max_P=0,
max_Q=0,
max_D=0)
model_auto.summary()
model_auto = ARIMA(train, order=(2,0,1))
model_auto_result = model_auto.fit()
model_auto_result.summary()
# evaluate model MSE and RMSE
start = len(train)
end = len(train) + len(test) - 1
arima201_preds = model_auto_result.predict(start=start, end=end, dynamic=False).rename('ARIMA201 Predictions')
arima201_mse = mean_squared_error(test, arima201_preds)
print("ARIMA(1,1,1) MSE: " + str(arima201_mse))
print("ARIMA(1,1,1) RMSE: " + str(np.sqrt(arima201_mse)))
arima111_preds.head()
arima201_preds.head()
#return the predicted values to reverse diff and reverse log to match the original dataset
arima201_r_diff = inv_diff(gas_log[train_size:], arima201_preds, 1)
arima201_r_diff
arima201_r_diff_exp = np.exp(arima201_r_diff)
arima201_r_diff_exp
len(arima201_r_diff_exp)
plt.figure(figsize=(25,5))
plt.plot(gasoline['Gasoline'][:train_size], label='train', color='#e58a3d')
plt.plot(gasoline['Gasoline'][train_size:], label='test', color='#e53d98')
plt.plot(gasoline.index[-100:], arima201_r_diff_exp, label='arima201', color='#e53d98', linestyle='--')
plt.title('ARIMA(2,0,1) Gasoline Results Inverse Diff+Exp', size=15)
plt.legend()
plt.show()
arima_gas = arima201_r_diff_exp
len(arima_gas)
print('Test Mean Absolute Error:', mean_absolute_error(test, arima_gas))
print('Test Root Mean Squared Error:',np.sqrt(mean_absolute_error(test, arima_gas)))
print('Test R2 Score: ', r2_score(test, arima_gas))
diesel.head()
diesel = diesel['Diesel']
diesel.head()
test_stationarity(diesel)
#log transform
d_log = np.log(diesel)
d_log.head()
test_stationarity(d_log)
#transform to stationary
d_log_diff = d_log.diff(periods=1)
d_log_diff
d_log_diff.dropna(inplace=True)
test_stationarity(d_log_diff)
len(d_log_diff)
# train_size = int(len(d_log_diff) * 0.80)
train_size = 2905
train = d_log_diff[:train_size]
test = d_log_diff[train_size:]
print("Train shape: " + str(len(train)))
print("Test shape: " + str(len(test)))
print(train.head(3), print(test.head(3)))
from statsmodels.graphics.tsaplots import plot_pacf
title = 'PACF: Log+Diff Diesel'
lags = 40
plot_pacf(train, title=title, lags=lags, zero=False, method=("ols"));
from statsmodels.graphics.tsaplots import plot_acf
title = 'ACF: Log+Diff Diesel'
lags = 40
plot_acf(train, title=title, lags=lags, zero=False);
from statsmodels.tsa.arima.model import ARIMA
# fitting a simple ARIMA(1,1,1) model
model = ARIMA(train, order=(1,1,1))
model_result = model.fit()
model_result.summary()
from sklearn.metrics import mean_squared_error
# evaluate model MSE and RMSE
start = len(train)
end = len(train) + len(test) - 1
arima111_preds = model_result.predict(start=start, end=end, dynamic=False).rename('ARIMA111 Predictions')
arima111_mse = mean_squared_error(test, arima111_preds)
print("ARIMA(1,1,1) MSE: " + str(arima111_mse))
print("ARIMA(1,1,1) RMSE: " + str(np.sqrt(arima111_mse)))
# use auto_arima to automate ARIMA model selection
from pmdarima.arima import auto_arima
model_auto = auto_arima(
train,
exogenous=None, # don't use exogenous variables
m=1, # don't add seasonal cycles as I'm interested only in an ARIMA model
max_order=None, # don't limit total number of non-seasonal AR and MA components
max_p=15, # use maximum of 15 lags for AR and MA components as implied by the ACF plot of the ARIMA(1,1,1) residuals
max_q=15,
max_d=2, # use max integration of 2 to reach stationarity
maxiter=100,
alpha=0.05, # use 0.05 level of significance
njobs=-1, # fit as many as possible models
trend="ct",
start_P=0, # don't use seasonal components as I'm interested only in an ARIMA model
start_Q=0,
start_D=0,
max_P=0,
max_Q=0,
max_D=0)
model_auto.summary()
model_auto = ARIMA(train, order=(3,0,7))
model_auto_result = model_auto.fit()
model_auto_result.summary()
# evaluate model MSE and RMSE
start = len(train)
end = len(train) + len(test) - 1
arima307_preds = model_auto_result.predict(start=start, end=end, dynamic=False).rename('ARIMA307 Predictions')
arima307_mse = mean_squared_error(test, arima307_preds)
print("ARIMA(1,1,1) MSE: " + str(arima307_mse))
print("ARIMA(1,1,1) RMSE: " + str(np.sqrt(arima307_mse)))
arima307_preds.head()
arima111_preds.head()
#return the predicted values to reverse diff and reverse log to match the original dataset
arima307_r_diff = inv_diff(gas_log[train_size:], arima307_preds, 1)
arima307_r_diff
arima307_r_diff_exp = np.exp(arima307_r_diff)
arima307_r_diff_exp
len(arima307_r_diff_exp)
plt.figure(figsize=(25,5))
plt.plot(df['Diesel'][:train_size], label='train', color='#3f5b9c')
plt.plot(df['Diesel'][train_size:], label='test', color='#9c3f5b')
plt.plot(diesel.index[-100:], arima307_r_diff_exp, label='arima307', color='#9c3f5b', linestyle='--')
plt.title('ARIMA(3,0,7) Diesel Results Inverse Diff+Exp', size=15)
plt.legend()
plt.show()
arima_diesel = arima307_r_diff_exp
len(arima_diesel)
print('Test Mean Absolute Error:', mean_absolute_error(test, arima_diesel))
print('Test Root Mean Squared Error:',np.sqrt(mean_absolute_error(test, arima_diesel)))
print('Test R2 Score: ', r2_score(test, arima_diesel))
jet.head()
jet = jet['Jet Fuel']
jet.head()
test_stationarity(jet)
#log transform
j_log = np.log(jet)
j_log.head()
test_stationarity(j_log)
#transform to stationary
j_log_diff = j_log.diff(periods=1)
j_log_diff
j_log_diff.dropna(inplace=True)
test_stationarity(j_log_diff)
len(j_log_diff)
# train_size = int(len(d_log_diff) * 0.80)
train_size = 2905
train = j_log_diff[:train_size]
test = j_log_diff[train_size:]
print("Train shape: " + str(len(train)))
print("Test shape: " + str(len(test)))
print(train.head(3), print(test.head(3)))
from statsmodels.graphics.tsaplots import plot_pacf
title = 'PACF: Log+Diff Jet Fuel'
lags = 40
plot_pacf(train, title=title, lags=lags, zero=False, method=("ols"));
from statsmodels.graphics.tsaplots import plot_acf
title = 'ACF: Log+Diff Jet Fuel'
lags = 40
plot_acf(train, title=title, lags=lags, zero=False);
from statsmodels.tsa.arima.model import ARIMA
# fitting a simple ARIMA(1,1,1) model
model = ARIMA(train, order=(1,1,1))
model_result = model.fit()
model_result.summary()
from sklearn.metrics import mean_squared_error
# evaluate model MSE and RMSE
start = len(train)
end = len(train) + len(test) - 1
arima111_preds = model_result.predict(start=start, end=end, dynamic=False).rename('ARIMA111 Predictions')
arima111_mse = mean_squared_error(test, arima111_preds)
print("ARIMA(1,1,1) MSE: " + str(arima111_mse))
print("ARIMA(1,1,1) RMSE: " + str(np.sqrt(arima111_mse)))
# use auto_arima to automate ARIMA model selection
from pmdarima.arima import auto_arima
model_auto = auto_arima(
train,
exogenous=None, # don't use exogenous variables
m=1, # don't add seasonal cycles as I'm interested only in an ARIMA model
max_order=None, # don't limit total number of non-seasonal AR and MA components
max_p=15, # use maximum of 15 lags for AR and MA components as implied by the ACF plot of the ARIMA(1,1,1) residuals
max_q=15,
max_d=2, # use max integration of 2 to reach stationarity
maxiter=100,
alpha=0.05, # use 0.05 level of significance
njobs=-1, # fit as many as possible models
trend="ct",
start_P=0, # don't use seasonal components as I'm interested only in an ARIMA model
start_Q=0,
start_D=0,
max_P=0,
max_Q=0,
max_D=0)
model_auto.summary()
model_auto = ARIMA(train, order=(0,0,3))
model_auto_result = model_auto.fit()
model_auto_result.summary()
# evaluate model MSE and RMSE
start = len(train)
end = len(train) + len(test) - 1
arima003_preds = model_auto_result.predict(start=start, end=end, dynamic=False).rename('ARIMA003 Predictions')
arima003_mse = mean_squared_error(test, arima003_preds)
print("ARIMA(0,0,3) MSE: " + str(arima003_mse))
print("ARIMA(0,0,3) RMSE: " + str(np.sqrt(arima003_mse)))
arima003_preds.head()
arima111_preds.head()
#return the predicted values to reverse diff and reverse log to match the original dataset
arima003_r_diff = inv_diff(j_log[train_size:], arima003_preds, 1)
arima003_r_diff
arima003_r_diff_exp = np.exp(arima003_r_diff)
arima003_r_diff_exp
len(arima003_r_diff_exp)
plt.figure(figsize=(25,5))
plt.plot(df['Jet Fuel'][:train_size], label='train', color='#c24e5d')
plt.plot(df['Jet Fuel'][train_size:], label='test', color='#5dc24e')
plt.plot(jet.index[-100:], arima003_r_diff_exp, label='arima003', color='#5dc24e', linestyle='--')
plt.title('ARIMA(0,0,3) Jet Fuel Results Inverse Diff+Exp', size=15)
plt.legend()
plt.show()
arima_jet = arima003_r_diff_exp
len(arima_jet)
print('Test Mean Absolute Error:', mean_absolute_error(test, arima_jet))
print('Test Root Mean Squared Error:',np.sqrt(mean_absolute_error(test, arima_jet)))
print('Test R2 Score: ', r2_score(test, arima_jet))
# !pip install fbprophet
import io, os, sys, setuptools, tokenize
from prophet import Prophet
df.reset_index(inplace=True)
df.head()
# Renaming the column & index for Prophet compatability
gas_prop= df[['Date', 'Gasoline']]
gas_prop.rename(columns = {'Date':'ds', 'Gasoline':'y'}, inplace = True)
gas_prop.head()
# train_size = int(len(gasoline1) * 0.80)
train_size = 2906
train = gas_prop[:train_size]
test = gas_prop[train_size:]
print("Train shape: " + str(len(train)))
print("Test shape: " + str(len(test)))
test.head()
# fit model
m = Prophet()
m.fit(train)
# populate forecast
forecast = m.predict(test)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
forecast.shape
# plot forecast
m.plot(forecast);
# plot individual components of forecast: trend, weekly/yearly seasonality,
m.plot_components(forecast);
plt.figure(figsize=(25,5))
plt.plot(df['Date'][:train_size],df['Gasoline'][:train_size], label='train', color='#e58a3d')
plt.plot(df['Date'][train_size:],df['Gasoline'][train_size:], label='test', color='#e53d98')
plt.plot(df['Date'][train_size:], forecast.yhat, label='prediction', color='#e53d98', linestyle='--')
plt.title('Prophet Gasoline Results', size=15)
plt.legend()
plt.show()
gas_prophet = forecast.yhat
len(gas_prophet)
test
print('Test Mean Absolute Error:', mean_absolute_error(test.y, gas_prophet))
print('Test Root Mean Squared Error:',np.sqrt(mean_absolute_error(test.y, gas_prophet)))
print('Test R2 Score: ', r2_score(test.y, gas_prophet))
# !pip install fbprophet
df.head()
# Renaming the column & index for Prophet compatability
die_prop= df[['Date', 'Diesel']]
die_prop.rename(columns = {'Date':'ds', 'Diesel':'y'}, inplace = True)
die_prop.head()
# train_size = int(len(gasoline1) * 0.80)
train_size = 2906
train = die_prop[:train_size]
test = die_prop[train_size:]
print("Train shape: " + str(len(train)))
print("Test shape: " + str(len(test)))
test.head()
# fit model
m = Prophet()
m.fit(train)
# populate forecast
forecast = m.predict(test)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
forecast.shape
# plot forecast
m.plot(forecast);
# plot individual components of forecast: trend, weekly/yearly seasonality,
m.plot_components(forecast);
plt.figure(figsize=(25,5))
plt.plot(df['Date'][:train_size],df['Diesel'][:train_size], label='train', color='#3f5b9c')
plt.plot(df['Date'][train_size:],df['Diesel'][train_size:], label='test', color='#9c3f5b')
plt.plot(df['Date'][train_size:], forecast.yhat, label='prediction', color='#9c3f5b', linestyle='--')
plt.title('Prophet Diesel Results', size=15)
plt.legend()
plt.show()
diesel_prophet = forecast.yhat
len(diesel_prophet)
print('Test Mean Absolute Error:', mean_absolute_error(test.y, diesel_prophet))
print('Test Root Mean Squared Error:',np.sqrt(mean_absolute_error(test.y, diesel_prophet)))
print('Test R2 Score: ', r2_score(test.y, diesel_prophet))
# !pip install fbprophet
df.head()
# Renaming the column & index for Prophet compatability
jet_prop= df[['Date', 'Jet Fuel']]
jet_prop.rename(columns = {'Date':'ds', 'Jet Fuel':'y'}, inplace = True)
jet_prop.head()
# train_size = int(len(gasoline1) * 0.80)
train_size = 2906
train = jet_prop[:train_size]
test = jet_prop[train_size:]
print("Train shape: " + str(len(train)))
print("Test shape: " + str(len(test)))
test.head()
# fit model
m = Prophet()
m.fit(train)
# populate forecast
forecast = m.predict(test)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
forecast.shape
# plot forecast
m.plot(forecast);
# plot individual components of forecast: trend, weekly/yearly seasonality,
m.plot_components(forecast);
plt.figure(figsize=(25,5))
plt.plot(df['Date'][:train_size],df['Jet Fuel'][:train_size], label='train', color='#c24e5d')
plt.plot(df['Date'][train_size:],df['Jet Fuel'][train_size:], label='test', color='#5dc24e')
plt.plot(df['Date'][train_size:], forecast.yhat, label='prediction', color='#5dc24e', linestyle='--')
plt.title('Prophet Jet Fuel Results', size=15)
plt.legend()
plt.show()
jet_prophet = forecast.yhat
len(jet_prophet)
print('Test Mean Absolute Error:', mean_absolute_error(test.y, jet_prophet))
print('Test Root Mean Squared Error:',np.sqrt(mean_absolute_error(test.y, jet_prophet)))
print('Test R2 Score: ', r2_score(test.y, jet_prophet))
plt.figure(figsize=(25,10))
# plt.plot(df['Date'][:train_size],df['Gasoline'][:train_size], label='train', color='#e58a3d')
plt.plot(df['Date'][train_size:],df['Gasoline'][train_size:], label='actual', color='#e58a3d', linewidth=3)
plt.plot(df['Date'][-69:], lstm_gas, label='lstm', linestyle='--', linewidth=2)
plt.plot(df['Date'][-69:], stlstm_gas, label='stacked lstm', linestyle='--', linewidth=2)
plt.plot(df['Date'][-100:], arima_gas, label='arima', linestyle='--', linewidth=2)
plt.plot(df['Date'][-100:], gas_prophet, label='fbprophet', linestyle='--', linewidth=2)
plt.title('All Models Gasoline', size=15)
plt.legend()
plt.show()
plt.figure(figsize=(25,10))
# plt.plot(df['Date'][:train_size],df['Diesel'][:train_size], label='train', color='#3f5b9c')
plt.plot(df['Date'][train_size:],df['Diesel'][train_size:], label='test', color='#3f5b9c', linewidth=3)
plt.plot(df['Date'][-69:], lstm_diesel, label='lstm', linestyle='--', linewidth=2)
plt.plot(df['Date'][-69:], stlstm_diesel, label='stacked lstm', linestyle='--', linewidth=2)
plt.plot(df['Date'][-100:], arima_diesel, label='arima', linestyle='--', linewidth=2)
plt.plot(df['Date'][-100:], diesel_prophet, label='fbprophet', linestyle='--', linewidth=2)
plt.title('All Models Diesel', size=15)
plt.legend()
plt.show()
plt.figure(figsize=(25,10))
# plt.plot(df['Date'][:train_size],df['Jet Fuel'][:train_size], label='train', color='#c24e5d')
plt.plot(df['Date'][train_size:],df['Jet Fuel'][train_size:], label='test', color='#c24e5d', linewidth=3)
plt.plot(df['Date'][-69:], lstm_jet, label='lstm', linestyle='--', linewidth=2)
plt.plot(df['Date'][-69:], stlstm_jet, label='stacked lstm', linestyle='--', linewidth=2)
plt.plot(df['Date'][-100:], arima_jet, label='arima', linestyle='--', linewidth=2)
plt.plot(df['Date'][-100:], jet_prophet, label='fbprophet', linestyle='--', linewidth=2)
plt.title('All Models Jet Fuel', size=15)
plt.legend()
plt.show()